import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"
For this excercise, we have written the following code to load the stock dataset built into plotly express.
stocks = px.data.stocks()
stocks.head()
Select a stock and create a suitable plot for it. Make sure the plot is readable with relevant information, such as date, values.
#Facebook stock
#Define x and y values
x = stocks['date']
y = stocks['FB']
fig, ax = plt.subplots(figsize = (24,10))
ax.plot(
x,y, color ='orange',
linestyle='dashdot', linewidth=1,
marker='o', markerfacecolor='g'
)
#Set all titles
ax.set_title('FaceBook stock')
ax.set_xlabel('Date')
ax.set_ylabel('Stock value')
ax.set_xticks((10,20,30,40,50,60,70,80,90,100,110))
plt.grid()
plt.show()
You've already plot data from one stock. It is possible to plot multiples of them to support comparison.
To highlight different lines, customise line styles, markers, colors and include a legend to the plot.
#Import patches, needed for legend
import matplotlib.patches as mpatches
#Define x and y
x = stocks['date']
yGOOG = stocks['GOOG']
yAAPL = stocks['AAPL']
yAMZN = stocks['AMZN']
yFB = stocks['FB']
yNFLX = stocks['NFLX']
yMSFT = stocks['MSFT']
#Create plot
fig, ax = plt.subplots(figsize = (24,10))
#Plot all different y-values
ax.plot(x, yGOOG, color='#6495ED', marker='o', markersize=4, markerfacecolor='black')
ax.plot(x, yAAPL, color='pink', marker='o', markersize=4, markerfacecolor='black')
ax.plot(x, yAMZN, color = 'orange', marker='o', markersize=4, markerfacecolor='black')
ax.plot(x, yFB, color = 'red', marker='o', markersize=4, markerfacecolor='black')
ax.plot(x, yNFLX, color = 'purple', marker='o', markersize=4, markerfacecolor='black')
ax.plot(x, yMSFT, color = 'blue', marker='o', markersize=4, markerfacecolor='black')
#Set titles
ax.set_title('All stock values')
ax.set_xlabel('Date')
ax.set_ylabel('Stock value')
#Set ticks
ax.set_xticks((10,20,30,40,50,60,70,80,90,100,110))
#Incl legend, use patches
lightblue = mpatches.Patch(color='#6495ED', label='GOOG stock')
pink = mpatches.Patch(color='pink', label='AAPL stock')
orange = mpatches.Patch(color='orange', label='AMZN stock')
red = mpatches.Patch(color='red', label='FB stock')
purple = mpatches.Patch(color='purple', label='NFLX stock')
blue = mpatches.Patch(color='blue', label='MSFT stock')
ax.legend(handles=[lightblue, pink, orange, red, purple, blue])
#Plot grid for readability
ax.grid()
First, load the tips dataset
tips = sns.load_dataset('tips')
tips.head()
Let's explore this dataset. Pose a question and create a plot that support drawing answers for your question.
Some possible questions:
#Difference between male and female
#Create plot, with different colors for male and female
figure = sns.lmplot(x='total_bill', y='tip', data=tips, hue='sex', fit_reg=True)
figure.add_legend()
#Based on the plotted regression lines in the figure, there is not much difference between male and female
#However, look into statistics for valid conclusion
#Create column relative tip
tips['relative'] = tips['tip']/ tips['total_bill']
#Question: More relative tips on weekends?
sns.catplot(data=tips, x='day', y='relative', kind='swarm')
#More extreme values on sunday, but not really a clear difference
#Question: is there a difference in relative tip between lunch and dinner? On different days?
sns.catplot(data=tips, x='day', y='relative', hue='time', kind='box')
#Only on friday we can really compare lunch and dinner, from plot it seems tips for lunch were higher
#For accurate answer better statistical analysis needs to be done
tips.head()
Redo the above exercises (challenges 2 & 3) with plotly express. Create diagrams which you can interact with.
Hints:
#Stocks dataset (exercise 2)
dfstocks = px.data.stocks()
fig = px.line(dfstocks, x='date', y=['GOOG', 'AAPL', 'AMZN', 'FB', 'NFLX', 'MSFT'])
fig.show()
#Tips dataset (exercise 3)
#Difference male/female
fig = px.scatter(tips, x='total_bill', y='tip', color='sex', trendline='ols')
fig.show()
#Question: More relative tips on weekends?
figurebox = px.box(tips, x='day', y='relative')
figurebox.show()
#Question: is there a difference in relative tip between lunch and dinner? On different days?
figurebox2 = px.box(tips, x='day', y='relative', color='time')
figurebox2.show()
Recreate the barplot below that shows the population of different continents for the year 2007.
Hints:
#load data
df = px.data.gapminder()
df.head()
df2007 = df[df['year'] == 2007]
#Sum population per continent and make new dataframe
df2007continent = df2007.groupby('continent').sum()
df2007continent = pd.DataFrame(df2007continent)
df2007continent.head()
#Create figure and make different colors for each bar
#Order according to population
figurebar = px.bar(df2007continent, x='pop', orientation='h', color=df2007continent.index)
figurebar.update_layout(yaxis={'categoryorder':'total ascending'})
figurebar.show()